# -*- coding: utf-8 -*-
import scrapy
import sys
reload(sys)
sys.setdefaultencoding('utf8')

from blog_crawl.items import MiniHacksItem

class MinihacksSpider(scrapy.Spider):
    name = 'minihacks'
    allowed_domains = ['mindhacks.cn']
    start_urls = [
        'http://mindhacks.cn/',
        'http://mindhacks.cn/page/2/',
        'http://mindhacks.cn/page/3/',
        'http://mindhacks.cn/page/4/',
        'http://mindhacks.cn/page/5/'
    ]

    def parse(self, response):
        for sel in response.xpath('//article/header'):
            item = MiniHacksItem()
	    '''
	    标题
	    response.xpath('//article/header/h2[@class="entry-title"]/a/@title').extract()
	    链接
	    response.xpath('//article/header/h2[@class="entry-title"]/a/@href').extract()
	    时间
	    response.xpath('//article/header/h5/div/span/a/time/@datetime').extract()
	    '''
            item['title'] = sel.xpath('h2[@class="entry-title"]/a/@title').extract()[0].encode('utf-8')
            item['link'] = sel.xpath('h2[@class="entry-title"]/a/@href').extract()
            item['datetime'] = sel.xpath('h5/div/span/a/time/@datetime').extract()
            yield item
